/***
This figure compares the industry composition of spending in QSS, Affinity and Womply. 
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------
* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Spending"

* Create required subfolders
cap mkdir "${root}/results/`category'"

*-------------------------------------------------------------------------------
* Step 1: Clean QSS Data
*-------------------------------------------------------------------------------
* Import excel sheet
project, uses("${root}/data/dvc/QSS/qssq1-20advpr.xls")
import excel using "${root}/data/dvc/QSS/qssq1-20advpr.xls", sheet("tableA1-2020Q1") ///
		cellrange(A6:I78) clear
keep A B C D 
destring C D, replace force 
drop if mi(A) & mi(C) & mi(D)
rename (*) (naicscode description q1_2020 q4_2019)

* only keep 2-digit code for industry benchmarking
keep if strlen(naicscode) == 2 |  strlen(naicscode) == 5 | naicscode == "721"
tempfile qss 
save `qss'

*-------------------------------------------------------------------------------
* Step 2: Clean Affinity
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/Affinity/national_day_mcc.dta")
use "${root}/data/derived/Affinity/national_day_mcc.dta", clear

* check file structure
isid date mcc 

rename mcc mcc_numeric
project, uses("${root}/data/dvc/Crosswalks/mcc_naics_cw.dta")
merge m:1 mcc_numeric using "${root}/data/dvc/Crosswalks/mcc_naics_cw.dta", nogen keepusing(NAICS*)
drop if mi(NAICS_id) | mi(date)

* collapse to date x NAICS_id 
gcollapse (sum) total_spend, by(date NAICS_id)

* check file structure 
isid date NAICS_id

* Only keep industries we can match to QSS
gen naicscode = substr(NAICS_id,1,2)
replace naicscode = "48-49" if inlist(naicscode, "48", "49")
replace naicscode = "44-45" if inlist(naicscode, "44", "45")
replace naicscode = "721" if substr(NAICS_id,1,3) == "721"
	
* collapse to 2-digit NAICS codes in Q1 2020
keep if year(date) == 2020 & inrange(month(date), 1, 3)
gcollapse (sum) total_spend, by(naicscode)

rename (total*) (affinity*)
		
tempfile affinity
save `affinity'

*-------------------------------------------------------------------------------
* Step 3: Clean Womply and Plot
*-------------------------------------------------------------------------------
project, uses("${root}/data/dvc/Industry Shares of Consumer Spending and Business Revenues/womply subcategory x monthly 2020.dta")
use "${root}/data/dvc/Industry Shares of Consumer Spending and Business Revenues/womply subcategory x monthly 2020.dta", clear

* merge onto subcategory NAICS 2-digit and 3-digit
project, uses("${root}/data/dvc/Crosswalks/crosswalk_naics_womply_subcategories_3_digit.dta")
merge m:1 womplysubcategory using "${root}/data/dvc/Crosswalks/crosswalk_naics_womply_subcategories_3_digit.dta", nogen 
keep if inrange(month, 1, 3)
replace naics_2 = "721" if naics_3 ==  "721"

gcollapse (sum) total*, by(naics_2)
rename naics* naicscode
rename (total*) (womply*)

* merge affinity 
merge 1:1 naicscode using `affinity', nogen 

* merge QSS 
merge 1:1 naicscode using `qss', nogen
keep if !mi(q1_2020)
rename q1_2020 qss
rename naicscode naics 

* generate shares out of QSS services
foreach var in womply_net_revenue affinity_spend qss {
	gegen `var'_total = total(`var')
	gen `var'_share = 100*(`var' / `var'_total)
} 

*Shorten descriptions 
gen short = description
replace short = "Natural Resources" if naics == "11"
replace short = "Mining" if naics == "21"
replace short = "Utilities" if naics == "22"
replace short = "Trans. & Warehousing" if naics == "48-49"
replace short = "Information" if naics == "51"
replace short = "Finance & Insurance" if naics == "52"
replace short = "Rental & Leasing" if naics == "53"
replace short = "Prof. Services" if naics == "54"
replace short = "Management" if naics == "55"
replace short = "Admin Support" if naics == "56"
replace short = "Education" if naics == "61"
replace short = "Health & Soc. Assist." if naics == "62"
replace short = "Arts, Entmt., & Rec" if naics == "71"
replace short = "Accom. & Food Services" if naics == "721"
replace short = "Other Services" if naics == "81"

graph hbar qss_share affinity_spend_share womply_net_revenue_share , ///
	over(short, sort(qss_share) descending label(ticks labsize(small)) gap(*3)) ///
	legend(order(1 "QSS" 2 "Affinity" 3 "Womply") ring(0) pos(5) col(1) size(2.5)) ///
	ytitle("Percent of Total Service Revenue in Q1 2020 (%)", size(2)) ///
	ylabel(0 "0%" 10 "10%" 20 "20%" 30 "30%", nogrid labsize(small)) ///
	${title_`version'} ///
	ysize(8) 
	
oi_graph_export "${root}/results/Spending/QSS, Affinity, Womply Industry Spending Shares", type(${fig_type})
